This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

install.packages('devtools')
trying URL 'https://cran.rstudio.com/bin/macosx/big-sur-arm64/contrib/4.2/devtools_2.4.5.tgz'
Content type 'application/x-gzip' length 421790 bytes (411 KB)
==================================================
downloaded 411 KB

The downloaded binary packages are in
    /var/folders/4q/4mb5qhz51qdgffngk4lqzs540000gn/T//RtmpE16bnd/downloaded_packages
install.packages('Seurat')
trying URL 'https://cran.rstudio.com/bin/macosx/big-sur-arm64/contrib/4.2/Seurat_5.0.1.tgz'
Content type 'application/x-gzip' length 4150196 bytes (4.0 MB)
==================================================
downloaded 4.0 MB

The downloaded binary packages are in
    /var/folders/4q/4mb5qhz51qdgffngk4lqzs540000gn/T//RtmpE16bnd/downloaded_packages
library(Seurat)
Warning: package ‘Seurat’ was built under R version 4.2.3Loading required package: SeuratObject
Warning: package ‘SeuratObject’ was built under R version 4.2.3Loading required package: sp
Warning: package ‘sp’ was built under R version 4.2.3
Attaching package: ‘sp’

The following object is masked from ‘package:IRanges’:

    %over%


Attaching package: ‘SeuratObject’

The following object is masked from ‘package:SummarizedExperiment’:

    Assays

The following object is masked from ‘package:GenomicRanges’:

    intersect

The following object is masked from ‘package:GenomeInfoDb’:

    intersect

The following object is masked from ‘package:IRanges’:

    intersect

The following object is masked from ‘package:S4Vectors’:

    intersect

The following object is masked from ‘package:BiocGenerics’:

    intersect

The following object is masked from ‘package:base’:

    intersect

Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio

Attaching package: ‘Seurat’

The following object is masked from ‘package:SummarizedExperiment’:

    Assays
library(dplyr)
Warning: package ‘dplyr’ was built under R version 4.2.3
Attaching package: ‘dplyr’

The following object is masked from ‘package:Biobase’:

    combine

The following object is masked from ‘package:matrixStats’:

    count

The following objects are masked from ‘package:GenomicRanges’:

    intersect, setdiff, union

The following object is masked from ‘package:GenomeInfoDb’:

    intersect

The following objects are masked from ‘package:IRanges’:

    collapse, desc, intersect, setdiff, slice, union

The following objects are masked from ‘package:S4Vectors’:

    first, intersect, rename, setdiff, setequal, union

The following objects are masked from ‘package:BiocGenerics’:

    combine, intersect, setdiff, union

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library(patchwork)
library(ggplot2)
#Loading matrices from cellranger output
library(Matrix)
Warning: package ‘Matrix’ was built under R version 4.2.3
Attaching package: ‘Matrix’

The following object is masked from ‘package:S4Vectors’:

    expand
matrix_dir = "/Users/pengwu/Documents/Research - Old/scRNAseq/HB1-Jan2019/filtered_feature_bc_matrix/"
barcode.path <- paste0(matrix_dir, "HB1_barcodes.tsv.gz")
features.path <- paste0(matrix_dir, "HB1_features.tsv.gz")
matrix.path <- paste0(matrix_dir, "HB1_matrix.mtx.gz")
mat <- readMM(file = matrix.path)
feature.names = read.delim(features.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
barcode.names = read.delim(barcode.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
colnames(mat) = barcode.names$V1
rownames(mat) = make.unique(feature.names$V2)
# Initialize the Seurat object with the raw (non-normalized data).  Keep all
# genes expressed in >= 3 cells (~0.1% of the data). Keep all cells with at
# least 200 detected genes
HB1 <- CreateSeuratObject(counts = mat, min.cells = 3, min.features = 200, project = "HB1")
Warning: Data is of class dgTMatrix. Coercing to dgCMatrix.
HB1
An object of class Seurat 
18626 features across 4340 samples within 1 assay 
Active assay: RNA (18626 features, 0 variable features)
 1 layer present: counts
#Loading matrices from cellranger output
library(Matrix)
matrix_dir = "/Users/pengwu/Documents/Research - Old/scRNAseq/HB4-Oct2020/outs/filtered_feature_bc_matrix/"
barcode.path <- paste0(matrix_dir, "HB4_barcodes.tsv.gz")
features.path <- paste0(matrix_dir, "HB4_features.tsv.gz")
matrix.path <- paste0(matrix_dir, "HB4_matrix.mtx.gz")
mat <- readMM(file = matrix.path)
feature.names = read.delim(features.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
barcode.names = read.delim(barcode.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
colnames(mat) = barcode.names$V1
rownames(mat) = make.unique(feature.names$V2)
# Initialize the Seurat object with the raw (non-normalized data).  Keep all
# genes expressed in >= 3 cells (~0.1% of the data). Keep all cells with at
# least 200 detected genes
HB4 <- CreateSeuratObject(counts = mat, min.cells = 3, min.features = 200, project = "HB4")
Warning: Data is of class dgTMatrix. Coercing to dgCMatrix.
HB4
An object of class Seurat 
19985 features across 7856 samples within 1 assay 
Active assay: RNA (19985 features, 0 variable features)
 1 layer present: counts
matrix_dir = "/Users/pengwu/Documents/Research - Old/scRNAseq/HB6-Oct2020/outs/filtered_feature_bc_matrix/"
barcode.path <- paste0(matrix_dir, "HB6_barcodes.tsv.gz")
features.path <- paste0(matrix_dir, "HB6_features.tsv.gz")
matrix.path <- paste0(matrix_dir, "HB6_matrix.mtx.gz")
mat <- readMM(file = matrix.path)
feature.names = read.delim(features.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
barcode.names = read.delim(barcode.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
colnames(mat) = barcode.names$V1
rownames(mat) = make.unique(feature.names$V2)
# Initialize the Seurat object with the raw (non-normalized data).  Keep all
# genes expressed in >= 3 cells (~0.1% of the data). Keep all cells with at
# least 200 detected genes
HB6 <- CreateSeuratObject(counts = mat, min.cells = 3, min.features = 200, project = "HB6")
Warning: Data is of class dgTMatrix. Coercing to dgCMatrix.
HB6
An object of class Seurat 
21067 features across 4490 samples within 1 assay 
Active assay: RNA (21067 features, 0 variable features)
 1 layer present: counts
matrix_dir = "/Users/pengwu/Documents/Research - Old/scRNAseq/HB7-Oct2020/outs/filtered_feature_bc_matrix/"
barcode.path <- paste0(matrix_dir, "HB7_barcodes.tsv.gz")
features.path <- paste0(matrix_dir, "HB7_features.tsv.gz")
matrix.path <- paste0(matrix_dir, "HB7_matrix.mtx.gz")
mat <- readMM(file = matrix.path)
feature.names = read.delim(features.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
barcode.names = read.delim(barcode.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
colnames(mat) = barcode.names$V1
rownames(mat) = make.unique(feature.names$V2)
# Initialize the Seurat object with the raw (non-normalized data).  Keep all
# genes expressed in >= 3 cells (~0.1% of the data). Keep all cells with at
# least 200 detected genes
HB7 <- CreateSeuratObject(counts = mat, min.cells = 3, min.features = 200, project = "HB7")
Warning: Data is of class dgTMatrix. Coercing to dgCMatrix.
HB7
An object of class Seurat 
20887 features across 8378 samples within 1 assay 
Active assay: RNA (20887 features, 0 variable features)
 1 layer present: counts
matrix_dir = "/Users/pengwu/Documents/Research - Old/scRNAseq/HB12-Jul2021/outs/"
barcode.path <- paste0(matrix_dir, "HB12_barcodes.tsv.gz")
features.path <- paste0(matrix_dir, "HB12_features.tsv.gz")
matrix.path <- paste0(matrix_dir, "HB12_matrix.mtx.gz")
mat <- readMM(file = matrix.path)
feature.names = read.delim(features.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
barcode.names = read.delim(barcode.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
colnames(mat) = barcode.names$V1
rownames(mat) = make.unique(feature.names$V2)
# Initialize the Seurat object with the raw (non-normalized data).  Keep all
# genes expressed in >= 3 cells (~0.1% of the data). Keep all cells with at
# least 200 detected genes
HB12 <- CreateSeuratObject(counts = mat, min.cells = 3, min.features = 200, project = "HB12")
Warning: Data is of class dgTMatrix. Coercing to dgCMatrix.
HB12
An object of class Seurat 
18040 features across 2318 samples within 1 assay 
Active assay: RNA (18040 features, 0 variable features)
 1 layer present: counts
matrix_dir = "/Users/pengwu/Documents/Research - Old/scRNAseq/HB13-Jul2021/outs/"
barcode.path <- paste0(matrix_dir, "HB13_barcodes.tsv.gz")
features.path <- paste0(matrix_dir, "HB13_features.tsv.gz")
matrix.path <- paste0(matrix_dir, "HB13_matrix.mtx.gz")
mat <- readMM(file = matrix.path)
feature.names = read.delim(features.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
barcode.names = read.delim(barcode.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
colnames(mat) = barcode.names$V1
rownames(mat) = make.unique(feature.names$V2)
# Initialize the Seurat object with the raw (non-normalized data).  Keep all
# genes expressed in >= 3 cells (~0.1% of the data). Keep all cells with at
# least 200 detected genes
HB13 <- CreateSeuratObject(counts = mat, min.cells = 3, min.features = 200, project = "HB13")
Warning: Data is of class dgTMatrix. Coercing to dgCMatrix.
HB13
An object of class Seurat 
18633 features across 3395 samples within 1 assay 
Active assay: RNA (18633 features, 0 variable features)
 1 layer present: counts
matrix_dir = "/Users/pengwu/Documents/Research - Old/scRNAseq/HB14-Apr2022/"
barcode.path <- paste0(matrix_dir, "HB14_barcodes.tsv.gz")
features.path <- paste0(matrix_dir, "HB14_features.tsv.gz")
matrix.path <- paste0(matrix_dir, "HB14_matrix.mtx.gz")
mat <- readMM(file = matrix.path)
feature.names = read.delim(features.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
barcode.names = read.delim(barcode.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
colnames(mat) = barcode.names$V1
rownames(mat) = make.unique(feature.names$V2)
# Initialize the Seurat object with the raw (non-normalized data).  Keep all
# genes expressed in >= 3 cells (~0.1% of the data). Keep all cells with at
# least 200 detected genes
HB14 <- CreateSeuratObject(counts = mat, min.cells = 3, min.features = 200, project = "HB14")
Warning: Data is of class dgTMatrix. Coercing to dgCMatrix.
HB14
An object of class Seurat 
16219 features across 4870 samples within 1 assay 
Active assay: RNA (16219 features, 0 variable features)
 1 layer present: counts
matrix_dir = "/Users/pengwu/Documents/Research - Old/scRNAseq/HB15-2/filtered_feature_bc_matrix/"
barcode.path <- paste0(matrix_dir, "HB15_barcodes.tsv.gz")
features.path <- paste0(matrix_dir, "HB15_features.tsv.gz")
matrix.path <- paste0(matrix_dir, "HB15_matrix.mtx.gz")
mat <- readMM(file = matrix.path)
feature.names = read.delim(features.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
barcode.names = read.delim(barcode.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
colnames(mat) = barcode.names$V1
rownames(mat) = make.unique(feature.names$V2)
# Initialize the Seurat object with the raw (non-normalized data).  Keep all
# genes expressed in >= 3 cells (~0.1% of the data). Keep all cells with at
# least 200 detected genes
HB15 <- CreateSeuratObject(counts = mat, min.cells = 3, min.features = 200, project = "HB15")
Warning: Data is of class dgTMatrix. Coercing to dgCMatrix.
HB15
An object of class Seurat 
18954 features across 2704 samples within 1 assay 
Active assay: RNA (18954 features, 0 variable features)
 1 layer present: counts
matrix_dir = "/Users/pengwu/Documents/Research - Old/scRNAseq/HB16/filtered_feature_bc_matrix/"
barcode.path <- paste0(matrix_dir, "HB16_barcodes.tsv.gz")
features.path <- paste0(matrix_dir, "HB16_features.tsv.gz")
matrix.path <- paste0(matrix_dir, "HB16_matrix.mtx.gz")
mat <- readMM(file = matrix.path)
feature.names = read.delim(features.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
barcode.names = read.delim(barcode.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
colnames(mat) = barcode.names$V1
rownames(mat) = make.unique(feature.names$V2)
# Initialize the Seurat object with the raw (non-normalized data).  Keep all
# genes expressed in >= 3 cells (~0.1% of the data). Keep all cells with at
# least 200 detected genes
HB16 <- CreateSeuratObject(counts = mat, min.cells = 3, min.features = 200, project = "HB16")
Warning: Data is of class dgTMatrix. Coercing to dgCMatrix.
HB16
An object of class Seurat 
18655 features across 2869 samples within 1 assay 
Active assay: RNA (18655 features, 0 variable features)
 1 layer present: counts
matrix_dir = "/Users/pengwu/Documents/Research - Old/scRNAseq/HB17/filtered_feature_bc_matrix/"
barcode.path <- paste0(matrix_dir, "HB17_barcodes.tsv.gz")
features.path <- paste0(matrix_dir, "HB17_features.tsv.gz")
matrix.path <- paste0(matrix_dir, "HB17_matrix.mtx.gz")
mat <- readMM(file = matrix.path)
feature.names = read.delim(features.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
barcode.names = read.delim(barcode.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
colnames(mat) = barcode.names$V1
rownames(mat) = make.unique(feature.names$V2)
# Initialize the Seurat object with the raw (non-normalized data).  Keep all
# genes expressed in >= 3 cells (~0.1% of the data). Keep all cells with at
# least 200 detected genes
HB17 <- CreateSeuratObject(counts = mat, min.cells = 3, min.features = 200, project = "HB16")
Warning: Data is of class dgTMatrix. Coercing to dgCMatrix.
HB17
An object of class Seurat 
18919 features across 1420 samples within 1 assay 
Active assay: RNA (18919 features, 0 variable features)
 1 layer present: counts
# The number of genes and UMIs (nGene and nUMI) are automatically calculated
# for every object by Seurat.  For non-UMI data, nUMI represents the sum of
# the non-normalized values within a cell We calculate the percentage of
# mitochondrial genes here and store it in percent.mito using AddMetaData.
# We use object@raw.data since this represents non-transformed and
# non-log-normalized counts The % of UMI mapping to MT-genes is a common
# scRNA-seq QC metric.
mito.features <- grep(pattern = "^MT-", x = rownames(x = HB1), value = TRUE)
percent.mito <- Matrix::colSums(x = GetAssayData(object = HB1, slot = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = HB1, slot = 'counts'))
Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
Please use the `layer` argument instead.
# The [[ operator can add columns to object metadata, and is a great place to stash QC stats
HB1[['percent.mito']] <- percent.mito
rb.genes <- rownames(HB1)[grep("^RP[SL]",rownames(HB1))]
percent.ribo <- Matrix::colSums(x = GetAssayData(object = HB1, slot = 'counts')[rb.genes, ]) / Matrix::colSums(x = GetAssayData(object = HB1, slot = 'counts'))
HB1[['percent.ribo']] <- percent.ribo
VlnPlot(object = HB1, features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), ncol = 4)
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.

# The number of genes and UMIs (nGene and nUMI) are automatically calculated
# for every object by Seurat.  For non-UMI data, nUMI represents the sum of
# the non-normalized values within a cell We calculate the percentage of
# mitochondrial genes here and store it in percent.mito using AddMetaData.
# We use object@raw.data since this represents non-transformed and
# non-log-normalized counts The % of UMI mapping to MT-genes is a common
# scRNA-seq QC metric.
mito.features <- grep(pattern = "^MT-", x = rownames(x = HB4), value = TRUE)
percent.mito <- Matrix::colSums(x = GetAssayData(object = HB4, slot = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = HB4, slot = 'counts'))
# The [[ operator can add columns to object metadata, and is a great place to stash QC stats
HB4[['percent.mito']] <- percent.mito
rb.genes <- rownames(HB4)[grep("^RP[SL]",rownames(HB4))]
percent.ribo <- Matrix::colSums(x = GetAssayData(object = HB4, slot = 'counts')[rb.genes, ]) / Matrix::colSums(x = GetAssayData(object = HB4, slot = 'counts'))
HB4[['percent.ribo']] <- percent.ribo
VlnPlot(object = HB4, features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), ncol = 4)
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.

# The number of genes and UMIs (nGene and nUMI) are automatically calculated
# for every object by Seurat.  For non-UMI data, nUMI represents the sum of
# the non-normalized values within a cell We calculate the percentage of
# mitochondrial genes here and store it in percent.mito using AddMetaData.
# We use object@raw.data since this represents non-transformed and
# non-log-normalized counts The % of UMI mapping to MT-genes is a common
# scRNA-seq QC metric.
mito.features <- grep(pattern = "^MT-", x = rownames(x = HB6), value = TRUE)
percent.mito <- Matrix::colSums(x = GetAssayData(object = HB6, slot = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = HB6, slot = 'counts'))
# The [[ operator can add columns to object metadata, and is a great place to stash QC stats
HB6[['percent.mito']] <- percent.mito
rb.genes <- rownames(HB6)[grep("^RP[SL]",rownames(HB6))]
percent.ribo <- Matrix::colSums(x = GetAssayData(object = HB6, slot = 'counts')[rb.genes, ]) / Matrix::colSums(x = GetAssayData(object = HB6, slot = 'counts'))
HB6[['percent.ribo']] <- percent.ribo
VlnPlot(object = HB6, features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), ncol = 4)
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.

# The number of genes and UMIs (nGene and nUMI) are automatically calculated
# for every object by Seurat.  For non-UMI data, nUMI represents the sum of
# the non-normalized values within a cell We calculate the percentage of
# mitochondrial genes here and store it in percent.mito using AddMetaData.
# We use object@raw.data since this represents non-transformed and
# non-log-normalized counts The % of UMI mapping to MT-genes is a common
# scRNA-seq QC metric.
mito.features <- grep(pattern = "^MT-", x = rownames(x = HB7), value = TRUE)
percent.mito <- Matrix::colSums(x = GetAssayData(object = HB7, slot = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = HB7, slot = 'counts'))
# The [[ operator can add columns to object metadata, and is a great place to stash QC stats
HB7[['percent.mito']] <- percent.mito
rb.genes <- rownames(HB7)[grep("^RP[SL]",rownames(HB7))]
percent.ribo <- Matrix::colSums(x = GetAssayData(object = HB7, slot = 'counts')[rb.genes, ]) / Matrix::colSums(x = GetAssayData(object = HB7, slot = 'counts'))
HB7[['percent.ribo']] <- percent.ribo
VlnPlot(object = HB7, features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), ncol = 4)
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.

# The number of genes and UMIs (nGene and nUMI) are automatically calculated
# for every object by Seurat.  For non-UMI data, nUMI represents the sum of
# the non-normalized values within a cell We calculate the percentage of
# mitochondrial genes here and store it in percent.mito using AddMetaData.
# We use object@raw.data since this represents non-transformed and
# non-log-normalized counts The % of UMI mapping to MT-genes is a common
# scRNA-seq QC metric.
mito.features <- grep(pattern = "^MT-", x = rownames(x = HB12), value = TRUE)
percent.mito <- Matrix::colSums(x = GetAssayData(object = HB12, slot = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = HB12, slot = 'counts'))
# The [[ operator can add columns to object metadata, and is a great place to stash QC stats
HB12[['percent.mito']] <- percent.mito
rb.genes <- rownames(HB12)[grep("^RP[SL]",rownames(HB12))]
percent.ribo <- Matrix::colSums(x = GetAssayData(object = HB12, slot = 'counts')[rb.genes, ]) / Matrix::colSums(x = GetAssayData(object = HB12, slot = 'counts'))
HB12[['percent.ribo']] <- percent.ribo
VlnPlot(object = HB12, features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), ncol = 4)
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.

# The number of genes and UMIs (nGene and nUMI) are automatically calculated
# for every object by Seurat.  For non-UMI data, nUMI represents the sum of
# the non-normalized values within a cell We calculate the percentage of
# mitochondrial genes here and store it in percent.mito using AddMetaData.
# We use object@raw.data since this represents non-transformed and
# non-log-normalized counts The % of UMI mapping to MT-genes is a common
# scRNA-seq QC metric.
mito.features <- grep(pattern = "^MT-", x = rownames(x = HB13), value = TRUE)
percent.mito <- Matrix::colSums(x = GetAssayData(object = HB13, slot = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = HB13, slot = 'counts'))
# The [[ operator can add columns to object metadata, and is a great place to stash QC stats
HB13[['percent.mito']] <- percent.mito
rb.genes <- rownames(HB13)[grep("^RP[SL]",rownames(HB13))]
percent.ribo <- Matrix::colSums(x = GetAssayData(object = HB13, slot = 'counts')[rb.genes, ]) / Matrix::colSums(x = GetAssayData(object = HB13, slot = 'counts'))
HB13[['percent.ribo']] <- percent.ribo
VlnPlot(object = HB13, features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), ncol = 4)
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.

# The number of genes and UMIs (nGene and nUMI) are automatically calculated
# for every object by Seurat.  For non-UMI data, nUMI represents the sum of
# the non-normalized values within a cell We calculate the percentage of
# mitochondrial genes here and store it in percent.mito using AddMetaData.
# We use object@raw.data since this represents non-transformed and
# non-log-normalized counts The % of UMI mapping to MT-genes is a common
# scRNA-seq QC metric.
mito.features <- grep(pattern = "^MT-", x = rownames(x = HB14), value = TRUE)
percent.mito <- Matrix::colSums(x = GetAssayData(object = HB14, slot = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = HB14, slot = 'counts'))
# The [[ operator can add columns to object metadata, and is a great place to stash QC stats
HB14[['percent.mito']] <- percent.mito
rb.genes <- rownames(HB14)[grep("^RP[SL]",rownames(HB14))]
percent.ribo <- Matrix::colSums(x = GetAssayData(object = HB14, slot = 'counts')[rb.genes, ]) / Matrix::colSums(x = GetAssayData(object = HB14, slot = 'counts'))
HB14[['percent.ribo']] <- percent.ribo
VlnPlot(object = HB14, features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), ncol = 4)
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.

# The number of genes and UMIs (nGene and nUMI) are automatically calculated
# for every object by Seurat.  For non-UMI data, nUMI represents the sum of
# the non-normalized values within a cell We calculate the percentage of
# mitochondrial genes here and store it in percent.mito using AddMetaData.
# We use object@raw.data since this represents non-transformed and
# non-log-normalized counts The % of UMI mapping to MT-genes is a common
# scRNA-seq QC metric.
mito.features <- grep(pattern = "^MT-", x = rownames(x = HB15), value = TRUE)
percent.mito <- Matrix::colSums(x = GetAssayData(object = HB15, slot = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = HB15, slot = 'counts'))
# The [[ operator can add columns to object metadata, and is a great place to stash QC stats
HB15[['percent.mito']] <- percent.mito
rb.genes <- rownames(HB15)[grep("^RP[SL]",rownames(HB15))]
percent.ribo <- Matrix::colSums(x = GetAssayData(object = HB15, slot = 'counts')[rb.genes, ]) / Matrix::colSums(x = GetAssayData(object = HB15, slot = 'counts'))
HB15[['percent.ribo']] <- percent.ribo
VlnPlot(object = HB15, features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), ncol = 4)
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.

# The number of genes and UMIs (nGene and nUMI) are automatically calculated
# for every object by Seurat.  For non-UMI data, nUMI represents the sum of
# the non-normalized values within a cell We calculate the percentage of
# mitochondrial genes here and store it in percent.mito using AddMetaData.
# We use object@raw.data since this represents non-transformed and
# non-log-normalized counts The % of UMI mapping to MT-genes is a common
# scRNA-seq QC metric.
mito.features <- grep(pattern = "^MT-", x = rownames(x = HB16), value = TRUE)
percent.mito <- Matrix::colSums(x = GetAssayData(object = HB16, slot = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = HB16, slot = 'counts'))
# The [[ operator can add columns to object metadata, and is a great place to stash QC stats
HB16[['percent.mito']] <- percent.mito
rb.genes <- rownames(HB16)[grep("^RP[SL]",rownames(HB16))]
percent.ribo <- Matrix::colSums(x = GetAssayData(object = HB16, slot = 'counts')[rb.genes, ]) / Matrix::colSums(x = GetAssayData(object = HB16, slot = 'counts'))
HB16[['percent.ribo']] <- percent.ribo
VlnPlot(object = HB16, features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), ncol = 4)
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.

# The number of genes and UMIs (nGene and nUMI) are automatically calculated
# for every object by Seurat.  For non-UMI data, nUMI represents the sum of
# the non-normalized values within a cell We calculate the percentage of
# mitochondrial genes here and store it in percent.mito using AddMetaData.
# We use object@raw.data since this represents non-transformed and
# non-log-normalized counts The % of UMI mapping to MT-genes is a common
# scRNA-seq QC metric.
mito.features <- grep(pattern = "^MT-", x = rownames(x = HB17), value = TRUE)
percent.mito <- Matrix::colSums(x = GetAssayData(object = HB17, slot = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = HB17, slot = 'counts'))
# The [[ operator can add columns to object metadata, and is a great place to stash QC stats
HB17[['percent.mito']] <- percent.mito
rb.genes <- rownames(HB17)[grep("^RP[SL]",rownames(HB17))]
percent.ribo <- Matrix::colSums(x = GetAssayData(object = HB17, slot = 'counts')[rb.genes, ]) / Matrix::colSums(x = GetAssayData(object = HB17, slot = 'counts'))
HB17[['percent.ribo']] <- percent.ribo
VlnPlot(object = HB17, features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), ncol = 4)
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.

# Set up HB1 object
HB1 <- subset(x = HB1, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mito < 0.15)

# Set up HB4 object
HB4 <- subset(x = HB4, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mito < 0.15)

# Set up HB6 object
HB6 <- subset(x = HB6, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mito < 0.15)

# Set up HB7 object
HB7 <- subset(x = HB7, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mito < 0.15)

# Set up HB12 object
HB12 <- subset(x = HB12, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mito < 0.15)

# Set up HB13 object
HB13 <- subset(x = HB13, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mito < 0.15)

# Set up HB14 object
HB14 <- subset(x = HB14, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mito < 0.15)

# Set up HB15 object
HB15 <- subset(x = HB15, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mito < 0.15)

# Set up HB16 object
HB16 <- subset(x = HB16, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mito < 0.15)

# Set up HB17 object
HB17 <- subset(x = HB17, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mito < 0.15)
HB1 <- NormalizeData(HB1)
Normalizing layer: counts
Performing log-normalization
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
HB1 <- FindVariableFeatures(HB1, selection.method = "vst", nfeatures = 2000)
Finding variable features for layer counts
Calculating gene variances
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
HB4 <- NormalizeData(HB4)
Normalizing layer: counts
Performing log-normalization
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
HB4 <- FindVariableFeatures(HB4, selection.method = "vst", nfeatures = 2000)
Finding variable features for layer counts
Calculating gene variances
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
HB6 <- NormalizeData(HB6)
Normalizing layer: counts
Performing log-normalization
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
HB6 <- FindVariableFeatures(HB6, selection.method = "vst", nfeatures = 2000)
Finding variable features for layer counts
Calculating gene variances
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
HB7 <- NormalizeData(HB7)
Normalizing layer: counts
Performing log-normalization
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
HB7 <- FindVariableFeatures(HB7, selection.method = "vst", nfeatures = 2000)
Finding variable features for layer counts
Calculating gene variances
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
HB12 <- NormalizeData(HB12)
Normalizing layer: counts
Performing log-normalization
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
HB12 <- FindVariableFeatures(HB12, selection.method = "vst", nfeatures = 2000)
Finding variable features for layer counts
Calculating gene variances
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
HB13 <- NormalizeData(HB13)
Normalizing layer: counts
Performing log-normalization
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
HB13 <- FindVariableFeatures(HB13, selection.method = "vst", nfeatures = 2000)
Finding variable features for layer counts
Calculating gene variances
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
HB14 <- NormalizeData(HB14)
Normalizing layer: counts
Performing log-normalization
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
HB14 <- FindVariableFeatures(HB14, selection.method = "vst", nfeatures = 2000)
Finding variable features for layer counts
Calculating gene variances
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
HB15 <- NormalizeData(HB15)
Normalizing layer: counts
Performing log-normalization
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
HB15 <- FindVariableFeatures(HB15, selection.method = "vst", nfeatures = 2000)
Finding variable features for layer counts
Calculating gene variances
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
HB16 <- NormalizeData(HB16)
Normalizing layer: counts
Performing log-normalization
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
HB16 <- FindVariableFeatures(HB16, selection.method = "vst", nfeatures = 2000)
Finding variable features for layer counts
Calculating gene variances
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
HB17 <- NormalizeData(HB17)
Normalizing layer: counts
Performing log-normalization
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
HB17 <- FindVariableFeatures(HB17, selection.method = "vst", nfeatures = 2000)
Finding variable features for layer counts
Calculating gene variances
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
HB1 <- AddMetaData(HB1, metadata="A", col.name="HB")
HB4 <- AddMetaData(HB4, metadata="B", col.name="HB")
HB6 <- AddMetaData(HB6, metadata="C", col.name="HB")
HB7 <- AddMetaData(HB7, metadata="D", col.name="HB")
HB12 <- AddMetaData(HB12, metadata="E", col.name="HB")
HB13 <- AddMetaData(HB13, metadata="F", col.name="HB")
HB14 <- AddMetaData(HB14, metadata="G", col.name="HB")
HB15 <- AddMetaData(HB15, metadata="H", col.name="HB")
HB16 <- AddMetaData(HB16, metadata="I", col.name="HB")
HB17 <- AddMetaData(HB17, metadata="J", col.name="HB")
HBbig.list <- list(HB1, HB4, HB6, HB7, HB12, HB13, HB14, HB15, HB16, HB17) 
features <- SelectIntegrationFeatures(object.list = HBbig.list)
HB.anchors <- FindIntegrationAnchors(object.list = HBbig.list, anchor.features = features)
Warning: Some cell names are duplicated across objects provided. Renaming to enforce unique cell names.Scaling features for provided objects

  |                                                  | 0 % ~calculating  
  |+++++                                             | 10% ~03s          
  |++++++++++                                        | 20% ~03s          
  |+++++++++++++++                                   | 30% ~03s          
  |++++++++++++++++++++                              | 40% ~03s          
  |+++++++++++++++++++++++++                         | 50% ~02s          
  |++++++++++++++++++++++++++++++                    | 60% ~01s          
  |+++++++++++++++++++++++++++++++++++               | 70% ~01s          
  |++++++++++++++++++++++++++++++++++++++++          | 80% ~01s          
  |+++++++++++++++++++++++++++++++++++++++++++++     | 90% ~00s          
  |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=03s  
Finding all pairwise anchors

  |                                                  | 0 % ~calculating  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 15330 anchors
Filtering anchors
    Retained 852 anchors

  |++                                                | 2 % ~02h 11m 02s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 11964 anchors
Filtering anchors
    Retained 1125 anchors

  |+++                                               | 4 % ~01h 41m 39s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 12869 anchors
Filtering anchors
    Retained 5233 anchors

  |++++                                              | 7 % ~02h 16m 06s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 15112 anchors
Filtering anchors
    Retained 2234 anchors

  |+++++                                             | 9 % ~05h 21m 19s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 17375 anchors
Filtering anchors
    Retained 5782 anchors

  |++++++                                            | 11% ~07h 10m 17s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 12929 anchors
Filtering anchors
    Retained 4177 anchors

  |+++++++                                           | 13% ~06h 08m 53s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 8823 anchors
Filtering anchors
    Retained 3033 anchors

  |++++++++                                          | 16% ~05h 13m 42s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 9314 anchors
Filtering anchors
    Retained 2724 anchors

  |+++++++++                                         | 18% ~04h 35m 58s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 8407 anchors
Filtering anchors
    Retained 2565 anchors

  |++++++++++                                        | 20% ~04h 02m 46s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 9194 anchors
Filtering anchors
    Retained 4384 anchors

  |++++++++++++                                      | 22% ~03h 39m 16s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 10113 anchors
Filtering anchors
    Retained 1410 anchors

  |+++++++++++++                                     | 24% ~03h 17m 41s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 11350 anchors
Filtering anchors
    Retained 2593 anchors

  |++++++++++++++                                    | 27% ~03h 02m 34s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 9320 anchors
Filtering anchors
    Retained 3586 anchors

  |+++++++++++++++                                   | 29% ~02h 46m 33s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 11471 anchors
Filtering anchors
    Retained 2469 anchors

  |++++++++++++++++                                  | 31% ~02h 35m 27s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 7356 anchors
Filtering anchors
    Retained 1361 anchors

  |+++++++++++++++++                                 | 33% ~02h 21m 44s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 13723 anchors
Filtering anchors
    Retained 2748 anchors

  |++++++++++++++++++                                | 36% ~02h 11m 46s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 16190 anchors
Filtering anchors
    Retained 2541 anchors

  |+++++++++++++++++++                               | 38% ~02h 05m 29s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 12306 anchors
Filtering anchors
    Retained 2490 anchors

  |++++++++++++++++++++                              | 40% ~01h 56m 59s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 15530 anchors
Filtering anchors
    Retained 4684 anchors

  |++++++++++++++++++++++                            | 42% ~01h 51m 42s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 8753 anchors
Filtering anchors
    Retained 2617 anchors

  |+++++++++++++++++++++++                           | 44% ~01h 43m 10s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 10615 anchors
Filtering anchors
    Retained 2483 anchors

  |++++++++++++++++++++++++                          | 47% ~01h 35m 47s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 9853 anchors
Filtering anchors
    Retained 2093 anchors

  |+++++++++++++++++++++++++                         | 49% ~01h 28m 50s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 10552 anchors
Filtering anchors
    Retained 3120 anchors

  |++++++++++++++++++++++++++                        | 51% ~01h 23m 20s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 9165 anchors
Filtering anchors
    Retained 3685 anchors

  |+++++++++++++++++++++++++++                       | 53% ~01h 17m 13s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 10302 anchors
Filtering anchors
    Retained 5874 anchors

  |++++++++++++++++++++++++++++                      | 56% ~01h 12m 23s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 7099 anchors
Filtering anchors
    Retained 2353 anchors

  |+++++++++++++++++++++++++++++                     | 58% ~01h 06m 34s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 8095 anchors
Filtering anchors
    Retained 3538 anchors

  |++++++++++++++++++++++++++++++                    | 60% ~01h 01m 15s  
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 10017 anchors
Filtering anchors
    Retained 2584 anchors

  |++++++++++++++++++++++++++++++++                  | 62% ~56m 32s      
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 9602 anchors
Filtering anchors
    Retained 2918 anchors

  |+++++++++++++++++++++++++++++++++                 | 64% ~52m 01s      
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 10774 anchors
Filtering anchors
    Retained 2359 anchors

  |++++++++++++++++++++++++++++++++++                | 67% ~48m 15s      
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 9160 anchors
Filtering anchors
    Retained 2575 anchors

  |+++++++++++++++++++++++++++++++++++               | 69% ~44m 07s      
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 10477 anchors
Filtering anchors
    Retained 4489 anchors

  |++++++++++++++++++++++++++++++++++++              | 71% ~40m 37s      
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 6997 anchors
Filtering anchors
    Retained 2476 anchors

  |+++++++++++++++++++++++++++++++++++++             | 73% ~36m 35s      
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 7940 anchors
Filtering anchors
    Retained 2744 anchors

  |++++++++++++++++++++++++++++++++++++++            | 76% ~32m 49s      
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 9826 anchors
Filtering anchors
    Retained 1899 anchors

  |+++++++++++++++++++++++++++++++++++++++           | 78% ~29m 20s      
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 7564 anchors
Filtering anchors
    Retained 2770 anchors

  |++++++++++++++++++++++++++++++++++++++++          | 80% ~25m 51s      
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 5830 anchors
Filtering anchors
    Retained 2276 anchors

  |++++++++++++++++++++++++++++++++++++++++++        | 82% ~22m 29s      
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 5818 anchors
Filtering anchors
    Retained 2613 anchors

  |+++++++++++++++++++++++++++++++++++++++++++       | 84% ~19m 23s      
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 5534 anchors
Filtering anchors
    Retained 2733 anchors

  |++++++++++++++++++++++++++++++++++++++++++++      | 87% ~16m 17s      
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 5891 anchors
Filtering anchors
    Retained 4267 anchors

  |+++++++++++++++++++++++++++++++++++++++++++++     | 89% ~13m 23s      
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 4675 anchors
Filtering anchors
    Retained 2188 anchors

  |++++++++++++++++++++++++++++++++++++++++++++++    | 91% ~10m 29s      
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 5289 anchors
Filtering anchors
    Retained 3146 anchors

  |+++++++++++++++++++++++++++++++++++++++++++++++   | 93% ~07m 43s      
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 5730 anchors
Filtering anchors
    Retained 1875 anchors

  |++++++++++++++++++++++++++++++++++++++++++++++++  | 96% ~05m 03s      
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 4996 anchors
Filtering anchors
    Retained 3309 anchors

  |+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~02m 29s      
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
    Found 5018 anchors
Filtering anchors
    Retained 3013 anchors

  |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=01h 49m 24s
# this command creates an 'integrated' data assay
HB.combined <- IntegrateData(anchorset = HB.anchors)
Merging dataset 10 into 4
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Merging dataset 8 into 4 10
Extracting anchors for merged samples
Finding integration vectors
Warning: Different cells in new layer data than already exists for scale.dataFinding integration vector weights
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Merging dataset 5 into 1
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Merging dataset 3 into 2
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Merging dataset 9 into 1 5
Extracting anchors for merged samples
Finding integration vectors
Warning: Different cells in new layer data than already exists for scale.dataFinding integration vector weights
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Merging dataset 7 into 4 10 8
Extracting anchors for merged samples
Finding integration vectors
Warning: Different cells in new layer data than already exists for scale.dataFinding integration vector weights
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Merging dataset 6 into 2 3
Extracting anchors for merged samples
Finding integration vectors
Warning: Different cells in new layer data than already exists for scale.dataFinding integration vector weights
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Merging dataset 2 3 6 into 4 10 8 7
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
Merging dataset 1 5 9 into 4 10 8 7 2 3 6
Extracting anchors for merged samples
Finding integration vectors
Finding integration vector weights
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Integrating data
# specify that we will perform downstream analysis on the corrected data note that the original
# unmodified data still resides in the 'RNA' assay
DefaultAssay(HB.combined) <- "integrated"

# Run the standard workflow for visualization and clustering
HB.combined <- ScaleData(HB.combined, vars.to.regress = c("nCount_RNA", "percent.mito", "percent.ribo"), verbose = FALSE)
HB.combined <- RunPCA(HB.combined, verbose = FALSE)
HB.combined <- RunUMAP(HB.combined, reduction = "pca", dims = 1:30, set.seed(123))
Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session16:49:05 UMAP embedding parameters a = 0.9922 b = 1.112
Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'
Also defined by ‘spam’
16:49:05 Read 40666 rows and found 30 numeric columns
16:49:05 Using Annoy for neighbor search, n_neighbors = 30
Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'
Also defined by ‘spam’
16:49:05 Building Annoy index with metric = cosine, n_trees = 50
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
16:49:08 Writing NN index file to temp file /var/folders/4q/4mb5qhz51qdgffngk4lqzs540000gn/T//RtmpE16bnd/file4201267475d
16:49:09 Searching Annoy index using 1 thread, search_k = 3000
16:49:22 Annoy recall = 100%
16:49:24 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
16:49:25 Initializing from normalized Laplacian + noise (using RSpectra)
16:49:26 Commencing optimization for 200 epochs, with 1868092 positive edges
Using method 'umap'
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
16:49:48 Optimization finished
HB.combined <- FindNeighbors(HB.combined, reduction = "pca", dims = 1:30)
Computing nearest neighbor graph
Computing SNN
HB.combined <- FindClusters(HB.combined, resolution = 0.4)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 40666
Number of edges: 1494520

Running Louvain algorithm...
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8948
Number of communities: 12
Elapsed time: 14 seconds
# Visualization
DimPlot(HB.combined, reduction = "umap", label = TRUE, repel = TRUE)

DimPlot(HB.combined, reduction = "umap", split.by = "HB", ncol = 4)

# How do I create a UMAP plot where cells are colored by replicate?  First, store the current
# identities in a new column of meta.data called CellType
HB.combined$CellType <- Idents(HB.combined)
# Next, switch the identity class of all cells to reflect replicate ID
Idents(HB.combined) <- "seurat_clusters"
DimPlot(HB.combined, reduction = "umap", label = TRUE)

<!-- rnb-text-end -->


<!-- rnb-chunk-begin -->


<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxucDIgPC0gRGltUGxvdChIQi5jb21iaW5lZCwgcmVkdWN0aW9uID0gXCJ1bWFwXCIsIGxhYmVsID0gVFJVRSwgcmVwZWwgPSBUUlVFKVxucDJcbmBgYCJ9 -->

```r
p2 <- DimPlot(HB.combined, reduction = "umap", label = TRUE, repel = TRUE)
p2
saveRDS(HB.combined, file = "/Users/pengwu/Documents/Experiments/scRNAseq/HBcombined.rds")
DimPlot(object = HB.combined, reduction = 'umap', split.by = "HB", width = 2000, height = 1500, units = "px", res = 300)
tiff(file = "/Volumes/backups/Peng/Research/scRNAseq/HBcombined.tiff", width = 2000, height = 1500, units = "px", res = 300)
dev.off()
# For performing differential expression after integration, we switch back to the original data
DefaultAssay(HB.combined) <- "RNA"
AllClust.markers <- FindAllMarkers(HB.combined, grouping.var = "HB", verbose = FALSE)
write.csv(AllClust.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/allclusters.csv",row.names = TRUE)
cluster0.markers <- FindConservedMarkers(HB.combined, ident.1 = 0, grouping.var = "HB", verbose = FALSE)
write.csv(cluster0.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster0.csv",row.names = TRUE)
cluster1.markers <- FindConservedMarkers(HB.combined, ident.1 = 1, grouping.var = "HB", verbose = FALSE)
write.csv(cluster1.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster1.csv",row.names = TRUE)
cluster2.markers <- FindConservedMarkers(HB.combined, ident.1 = 2, grouping.var = "HB", verbose = FALSE)
write.csv(cluster2.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster2.csv",row.names = TRUE)
cluster3.markers <- FindConservedMarkers(HB.combined, ident.1 = 3, grouping.var = "HB", verbose = FALSE)
write.csv(cluster3.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster3.csv",row.names = TRUE)
cluster4.markers <- FindConservedMarkers(HB.combined, ident.1 = 4, grouping.var = "HB", verbose = FALSE)
write.csv(cluster4.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster4.csv",row.names = TRUE)
cluster5.markers <- FindConservedMarkers(HB.combined, ident.1 = 5, grouping.var = "HB", verbose = FALSE)
write.csv(cluster5.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster5.csv",row.names = TRUE)
cluster6.markers <- FindConservedMarkers(HB.combined, ident.1 = 6, grouping.var = "HB", verbose = FALSE)
write.csv(cluster6.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster6.csv",row.names = TRUE)
cluster7.markers <- FindConservedMarkers(HB.combined, ident.1 =7, grouping.var = "HB", verbose = FALSE)
write.csv(cluster7.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster7.csv",row.names = TRUE)
cluster8.markers <- FindConservedMarkers(HB.combined, ident.1 =8, grouping.var = "HB", verbose = FALSE)
write.csv(cluster8.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster8.csv",row.names = TRUE)
cluster9.markers <- FindConservedMarkers(HB.combined, ident.1 =9, grouping.var = "HB", verbose = FALSE)
write.csv(cluster9.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster9.csv",row.names = TRUE)
cluster10.markers <- FindConservedMarkers(HB.combined, ident.1 = 10, grouping.var = "HB", verbose = FALSE)
write.csv(cluster10.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster10.csv",row.names = TRUE)
cluster11.markers <- FindConservedMarkers(HB.combined, ident.1 = 11, grouping.var = "HB", verbose = FALSE)
write.csv(cluster11.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster11.csv",row.names = TRUE)
# Example for plotting gene features
FeaturePlot(object = HB.combined, features = c("FGF19", "KLB", "FGFR4"))
tiff(file = "FGF.tiff", width = 1500, height = 1500, units = "px", res = 300)
dev.off()
# Example for calculating gene signatures
FetvNorm <- read.csv('/Users/pengwu/Documents/Experiments/RNAseq/180805Final/FetvNorm_Top50.csv', header=F, sep=",")
FetvNorm <- as.matrix(FetvNorm)
HB.combined <- AddModuleScore(HB.combined,
                  features = list(FetvNorm),
                  name="FetvNorm")
# Plot scores
FeaturePlot(HB.combined,
            features = c("FetvNorm1"), label = FALSE, repel = TRUE) +
            scale_colour_gradientn(colours = rev(brewer.pal(n = 11, name = "RdBu")))
# Example for violin plots
my_genes <- HB.combined[row.names(subset(fData(HB.combined), gene_short_name %in% c("AFP", "HNF4A", "KRT19", "SOX4"))),]

tiff(file = "AFPGenes_Violin.tiff", width = 1500, height = 1500, units = "px", res = 300)
p <- plot_genes_violin(my_genes, grouping = "cluster", ncol=2, min_expr=0.1,
  color_by = "cluster")
p + geom_jitter(shape=16, position=position_jitter(0.2))
dev.off()
# Dot plots - the size of the dot corresponds to the percentage of cells expressing the
# feature in each cluster. The color represents the average expression level
features.plot <- c("FGF1", "FGF2", "FGF3", "FGF4", "FGF7", "FGF8", "FGF9", "FGF10", "FGF11", "FGF12", "FGF13", "FGF14", "FGF17", "FGF18", "FGF19", "FGF20", "FGF21", "FGF22", "FGF23")
DotPlot(HB.combined, features = features.plot, group.by = "HB") + RotatedAxis()
# Dot plots - the size of the dot corresponds to the percentage of cells expressing the
# feature in each cluster. The color represents the average expression level
features.plot <- c("FGFR1", "FGFR2", "FGFR3", "FGFR4", "KLB", "EGFR", "MET", "IGF1R", "IGF2R", "INSR", "EGF", "HGF", "IGF1", "IGF2.1")
DotPlot(HB.combined, features = features.plot, group.by = "HB") + RotatedAxis()
umapCoord <- as.data.frame(Embeddings(object = HB.combined[["umap"]]))
umapCoordmat <- as.matrix(umapCoord)
# Monocle for pseudotime analyses
remotes::install_github('satijalab/seurat-wrappers')
library(monocle3)
library(Seurat)
library(SeuratWrappers)
library(patchwork)
library(dplyr)

set.seed(1234)
HB.monocle <- as.cell_data_set(HB.combined)
HB.monocle <- cluster_cells(HB.monocle, cluster_method = "louvain")

p1 <- plot_cells(HB.monocle, color_cells_by = "cluster", show_trajectory_graph = FALSE)
p2 <- plot_cells(HB.monocle, color_cells_by = "partition", show_trajectory_graph = FALSE)
wrap_plots(p1, p2)
integrated.sub <- subset(as.Seurat(HB.monocle, assay = NULL), monocle3_partitions == 1)
HB.monocle <- as.cell_data_set(integrated.sub)
HB.monocle <- learn_graph(HB.monocle, use_partition = TRUE, verbose = FALSE)
plot_cells(HB.monocle,
           color_cells_by = "cluster",
           label_groups_by_cluster=FALSE,
           label_leaves=FALSE,
           label_branch_points=FALSE)
HB.monocle <- order_cells(HB.monocle, root_cells = colnames(HB.monocle[,clusters(HB.monocle) == 47]))
plot_cells(HB.monocle,
           color_cells_by = "pseudotime",
           group_cells_by = "cluster",
           label_cell_groups = FALSE,
           label_groups_by_cluster=FALSE,
           label_leaves=FALSE,
           label_branch_points=FALSE,
           label_roots = FALSE,
           trajectory_graph_color = "grey60")

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

---
title: "Seurat"
output:
  html_document:
    df_print: paged
  html_notebook: default
  pdf_document: default
---

This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. 

Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Cmd+Shift+Enter*. 

```{r}
install.packages('devtools')
install.packages('Seurat')
library(Seurat)
library(dplyr)
library(patchwork)
library(ggplot2)
```

```{r}
#Loading matrices from cellranger output
library(Matrix)
matrix_dir = "/Users/pengwu/Documents/Research - Old/scRNAseq/HB1-Jan2019/filtered_feature_bc_matrix/"
barcode.path <- paste0(matrix_dir, "HB1_barcodes.tsv.gz")
features.path <- paste0(matrix_dir, "HB1_features.tsv.gz")
matrix.path <- paste0(matrix_dir, "HB1_matrix.mtx.gz")
mat <- readMM(file = matrix.path)
feature.names = read.delim(features.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
barcode.names = read.delim(barcode.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
colnames(mat) = barcode.names$V1
rownames(mat) = make.unique(feature.names$V2)
```
```{r}
# Initialize the Seurat object with the raw (non-normalized data).  Keep all
# genes expressed in >= 3 cells (~0.1% of the data). Keep all cells with at
# least 200 detected genes
HB1 <- CreateSeuratObject(counts = mat, min.cells = 3, min.features = 200, project = "HB1")
HB1
```

```{r}
#Loading matrices from cellranger output
library(Matrix)
matrix_dir = "/Users/pengwu/Documents/Research - Old/scRNAseq/HB4-Oct2020/outs/filtered_feature_bc_matrix/"
barcode.path <- paste0(matrix_dir, "HB4_barcodes.tsv.gz")
features.path <- paste0(matrix_dir, "HB4_features.tsv.gz")
matrix.path <- paste0(matrix_dir, "HB4_matrix.mtx.gz")
mat <- readMM(file = matrix.path)
feature.names = read.delim(features.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
barcode.names = read.delim(barcode.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
colnames(mat) = barcode.names$V1
rownames(mat) = make.unique(feature.names$V2)
```

```{r}
# Initialize the Seurat object with the raw (non-normalized data).  Keep all
# genes expressed in >= 3 cells (~0.1% of the data). Keep all cells with at
# least 200 detected genes
HB4 <- CreateSeuratObject(counts = mat, min.cells = 3, min.features = 200, project = "HB4")
HB4
```

```{r}
matrix_dir = "/Users/pengwu/Documents/Research - Old/scRNAseq/HB6-Oct2020/outs/filtered_feature_bc_matrix/"
barcode.path <- paste0(matrix_dir, "HB6_barcodes.tsv.gz")
features.path <- paste0(matrix_dir, "HB6_features.tsv.gz")
matrix.path <- paste0(matrix_dir, "HB6_matrix.mtx.gz")
mat <- readMM(file = matrix.path)
feature.names = read.delim(features.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
barcode.names = read.delim(barcode.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
colnames(mat) = barcode.names$V1
rownames(mat) = make.unique(feature.names$V2)
```

```{r}
# Initialize the Seurat object with the raw (non-normalized data).  Keep all
# genes expressed in >= 3 cells (~0.1% of the data). Keep all cells with at
# least 200 detected genes
HB6 <- CreateSeuratObject(counts = mat, min.cells = 3, min.features = 200, project = "HB6")
HB6
```

```{r}
matrix_dir = "/Users/pengwu/Documents/Research - Old/scRNAseq/HB7-Oct2020/outs/filtered_feature_bc_matrix/"
barcode.path <- paste0(matrix_dir, "HB7_barcodes.tsv.gz")
features.path <- paste0(matrix_dir, "HB7_features.tsv.gz")
matrix.path <- paste0(matrix_dir, "HB7_matrix.mtx.gz")
mat <- readMM(file = matrix.path)
feature.names = read.delim(features.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
barcode.names = read.delim(barcode.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
colnames(mat) = barcode.names$V1
rownames(mat) = make.unique(feature.names$V2)
```

```{r}
# Initialize the Seurat object with the raw (non-normalized data).  Keep all
# genes expressed in >= 3 cells (~0.1% of the data). Keep all cells with at
# least 200 detected genes
HB7 <- CreateSeuratObject(counts = mat, min.cells = 3, min.features = 200, project = "HB7")
HB7
```
```{r}
matrix_dir = "/Users/pengwu/Documents/Research - Old/scRNAseq/HB12-Jul2021/outs/"
barcode.path <- paste0(matrix_dir, "HB12_barcodes.tsv.gz")
features.path <- paste0(matrix_dir, "HB12_features.tsv.gz")
matrix.path <- paste0(matrix_dir, "HB12_matrix.mtx.gz")
mat <- readMM(file = matrix.path)
feature.names = read.delim(features.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
barcode.names = read.delim(barcode.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
colnames(mat) = barcode.names$V1
rownames(mat) = make.unique(feature.names$V2)
```

```{r}
# Initialize the Seurat object with the raw (non-normalized data).  Keep all
# genes expressed in >= 3 cells (~0.1% of the data). Keep all cells with at
# least 200 detected genes
HB12 <- CreateSeuratObject(counts = mat, min.cells = 3, min.features = 200, project = "HB12")
HB12
```

```{r}
matrix_dir = "/Users/pengwu/Documents/Research - Old/scRNAseq/HB13-Jul2021/outs/"
barcode.path <- paste0(matrix_dir, "HB13_barcodes.tsv.gz")
features.path <- paste0(matrix_dir, "HB13_features.tsv.gz")
matrix.path <- paste0(matrix_dir, "HB13_matrix.mtx.gz")
mat <- readMM(file = matrix.path)
feature.names = read.delim(features.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
barcode.names = read.delim(barcode.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
colnames(mat) = barcode.names$V1
rownames(mat) = make.unique(feature.names$V2)
```

```{r}
# Initialize the Seurat object with the raw (non-normalized data).  Keep all
# genes expressed in >= 3 cells (~0.1% of the data). Keep all cells with at
# least 200 detected genes
HB13 <- CreateSeuratObject(counts = mat, min.cells = 3, min.features = 200, project = "HB13")
HB13
```
```{r}
matrix_dir = "/Users/pengwu/Documents/Research - Old/scRNAseq/HB14-Apr2022/"
barcode.path <- paste0(matrix_dir, "HB14_barcodes.tsv.gz")
features.path <- paste0(matrix_dir, "HB14_features.tsv.gz")
matrix.path <- paste0(matrix_dir, "HB14_matrix.mtx.gz")
mat <- readMM(file = matrix.path)
feature.names = read.delim(features.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
barcode.names = read.delim(barcode.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
colnames(mat) = barcode.names$V1
rownames(mat) = make.unique(feature.names$V2)
```

```{r}
# Initialize the Seurat object with the raw (non-normalized data).  Keep all
# genes expressed in >= 3 cells (~0.1% of the data). Keep all cells with at
# least 200 detected genes
HB14 <- CreateSeuratObject(counts = mat, min.cells = 3, min.features = 200, project = "HB14")
HB14
```
```{r}
matrix_dir = "/Users/pengwu/Documents/Research - Old/scRNAseq/HB15-2/filtered_feature_bc_matrix/"
barcode.path <- paste0(matrix_dir, "HB15_barcodes.tsv.gz")
features.path <- paste0(matrix_dir, "HB15_features.tsv.gz")
matrix.path <- paste0(matrix_dir, "HB15_matrix.mtx.gz")
mat <- readMM(file = matrix.path)
feature.names = read.delim(features.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
barcode.names = read.delim(barcode.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
colnames(mat) = barcode.names$V1
rownames(mat) = make.unique(feature.names$V2)
```

```{r}
# Initialize the Seurat object with the raw (non-normalized data).  Keep all
# genes expressed in >= 3 cells (~0.1% of the data). Keep all cells with at
# least 200 detected genes
HB15 <- CreateSeuratObject(counts = mat, min.cells = 3, min.features = 200, project = "HB15")
HB15
```
```{r}
matrix_dir = "/Users/pengwu/Documents/Research - Old/scRNAseq/HB16/filtered_feature_bc_matrix/"
barcode.path <- paste0(matrix_dir, "HB16_barcodes.tsv.gz")
features.path <- paste0(matrix_dir, "HB16_features.tsv.gz")
matrix.path <- paste0(matrix_dir, "HB16_matrix.mtx.gz")
mat <- readMM(file = matrix.path)
feature.names = read.delim(features.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
barcode.names = read.delim(barcode.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
colnames(mat) = barcode.names$V1
rownames(mat) = make.unique(feature.names$V2)
```

```{r}
# Initialize the Seurat object with the raw (non-normalized data).  Keep all
# genes expressed in >= 3 cells (~0.1% of the data). Keep all cells with at
# least 200 detected genes
HB16 <- CreateSeuratObject(counts = mat, min.cells = 3, min.features = 200, project = "HB16")
HB16
```
```{r}
matrix_dir = "/Users/pengwu/Documents/Research - Old/scRNAseq/HB17/filtered_feature_bc_matrix/"
barcode.path <- paste0(matrix_dir, "HB17_barcodes.tsv.gz")
features.path <- paste0(matrix_dir, "HB17_features.tsv.gz")
matrix.path <- paste0(matrix_dir, "HB17_matrix.mtx.gz")
mat <- readMM(file = matrix.path)
feature.names = read.delim(features.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
barcode.names = read.delim(barcode.path, 
                           header = FALSE,
                           stringsAsFactors = FALSE)
colnames(mat) = barcode.names$V1
rownames(mat) = make.unique(feature.names$V2)
```

```{r}
# Initialize the Seurat object with the raw (non-normalized data).  Keep all
# genes expressed in >= 3 cells (~0.1% of the data). Keep all cells with at
# least 200 detected genes
HB17 <- CreateSeuratObject(counts = mat, min.cells = 3, min.features = 200, project = "HB16")
HB17
```
```{r}
# The number of genes and UMIs (nGene and nUMI) are automatically calculated
# for every object by Seurat.  For non-UMI data, nUMI represents the sum of
# the non-normalized values within a cell We calculate the percentage of
# mitochondrial genes here and store it in percent.mito using AddMetaData.
# We use object@raw.data since this represents non-transformed and
# non-log-normalized counts The % of UMI mapping to MT-genes is a common
# scRNA-seq QC metric.
mito.features <- grep(pattern = "^MT-", x = rownames(x = HB1), value = TRUE)
percent.mito <- Matrix::colSums(x = GetAssayData(object = HB1, slot = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = HB1, slot = 'counts'))
# The [[ operator can add columns to object metadata, and is a great place to stash QC stats
HB1[['percent.mito']] <- percent.mito
```
```{r}
rb.genes <- rownames(HB1)[grep("^RP[SL]",rownames(HB1))]
percent.ribo <- Matrix::colSums(x = GetAssayData(object = HB1, slot = 'counts')[rb.genes, ]) / Matrix::colSums(x = GetAssayData(object = HB1, slot = 'counts'))
HB1[['percent.ribo']] <- percent.ribo
```
```{r}
VlnPlot(object = HB1, features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), ncol = 4)
```
```{r}
# The number of genes and UMIs (nGene and nUMI) are automatically calculated
# for every object by Seurat.  For non-UMI data, nUMI represents the sum of
# the non-normalized values within a cell We calculate the percentage of
# mitochondrial genes here and store it in percent.mito using AddMetaData.
# We use object@raw.data since this represents non-transformed and
# non-log-normalized counts The % of UMI mapping to MT-genes is a common
# scRNA-seq QC metric.
mito.features <- grep(pattern = "^MT-", x = rownames(x = HB4), value = TRUE)
percent.mito <- Matrix::colSums(x = GetAssayData(object = HB4, slot = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = HB4, slot = 'counts'))
# The [[ operator can add columns to object metadata, and is a great place to stash QC stats
HB4[['percent.mito']] <- percent.mito
```
```{r}
rb.genes <- rownames(HB4)[grep("^RP[SL]",rownames(HB4))]
percent.ribo <- Matrix::colSums(x = GetAssayData(object = HB4, slot = 'counts')[rb.genes, ]) / Matrix::colSums(x = GetAssayData(object = HB4, slot = 'counts'))
HB4[['percent.ribo']] <- percent.ribo
VlnPlot(object = HB4, features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), ncol = 4)
```
```{r}
# The number of genes and UMIs (nGene and nUMI) are automatically calculated
# for every object by Seurat.  For non-UMI data, nUMI represents the sum of
# the non-normalized values within a cell We calculate the percentage of
# mitochondrial genes here and store it in percent.mito using AddMetaData.
# We use object@raw.data since this represents non-transformed and
# non-log-normalized counts The % of UMI mapping to MT-genes is a common
# scRNA-seq QC metric.
mito.features <- grep(pattern = "^MT-", x = rownames(x = HB6), value = TRUE)
percent.mito <- Matrix::colSums(x = GetAssayData(object = HB6, slot = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = HB6, slot = 'counts'))
# The [[ operator can add columns to object metadata, and is a great place to stash QC stats
HB6[['percent.mito']] <- percent.mito
```
```{r}
rb.genes <- rownames(HB6)[grep("^RP[SL]",rownames(HB6))]
percent.ribo <- Matrix::colSums(x = GetAssayData(object = HB6, slot = 'counts')[rb.genes, ]) / Matrix::colSums(x = GetAssayData(object = HB6, slot = 'counts'))
HB6[['percent.ribo']] <- percent.ribo
VlnPlot(object = HB6, features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), ncol = 4)
```
```{r}
# The number of genes and UMIs (nGene and nUMI) are automatically calculated
# for every object by Seurat.  For non-UMI data, nUMI represents the sum of
# the non-normalized values within a cell We calculate the percentage of
# mitochondrial genes here and store it in percent.mito using AddMetaData.
# We use object@raw.data since this represents non-transformed and
# non-log-normalized counts The % of UMI mapping to MT-genes is a common
# scRNA-seq QC metric.
mito.features <- grep(pattern = "^MT-", x = rownames(x = HB7), value = TRUE)
percent.mito <- Matrix::colSums(x = GetAssayData(object = HB7, slot = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = HB7, slot = 'counts'))
# The [[ operator can add columns to object metadata, and is a great place to stash QC stats
HB7[['percent.mito']] <- percent.mito
```
```{r}
rb.genes <- rownames(HB7)[grep("^RP[SL]",rownames(HB7))]
percent.ribo <- Matrix::colSums(x = GetAssayData(object = HB7, slot = 'counts')[rb.genes, ]) / Matrix::colSums(x = GetAssayData(object = HB7, slot = 'counts'))
HB7[['percent.ribo']] <- percent.ribo
VlnPlot(object = HB7, features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), ncol = 4)
```

```{r}
# The number of genes and UMIs (nGene and nUMI) are automatically calculated
# for every object by Seurat.  For non-UMI data, nUMI represents the sum of
# the non-normalized values within a cell We calculate the percentage of
# mitochondrial genes here and store it in percent.mito using AddMetaData.
# We use object@raw.data since this represents non-transformed and
# non-log-normalized counts The % of UMI mapping to MT-genes is a common
# scRNA-seq QC metric.
mito.features <- grep(pattern = "^MT-", x = rownames(x = HB12), value = TRUE)
percent.mito <- Matrix::colSums(x = GetAssayData(object = HB12, slot = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = HB12, slot = 'counts'))
# The [[ operator can add columns to object metadata, and is a great place to stash QC stats
HB12[['percent.mito']] <- percent.mito
```
```{r}
rb.genes <- rownames(HB12)[grep("^RP[SL]",rownames(HB12))]
percent.ribo <- Matrix::colSums(x = GetAssayData(object = HB12, slot = 'counts')[rb.genes, ]) / Matrix::colSums(x = GetAssayData(object = HB12, slot = 'counts'))
HB12[['percent.ribo']] <- percent.ribo
VlnPlot(object = HB12, features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), ncol = 4)
```

```{r}
# The number of genes and UMIs (nGene and nUMI) are automatically calculated
# for every object by Seurat.  For non-UMI data, nUMI represents the sum of
# the non-normalized values within a cell We calculate the percentage of
# mitochondrial genes here and store it in percent.mito using AddMetaData.
# We use object@raw.data since this represents non-transformed and
# non-log-normalized counts The % of UMI mapping to MT-genes is a common
# scRNA-seq QC metric.
mito.features <- grep(pattern = "^MT-", x = rownames(x = HB13), value = TRUE)
percent.mito <- Matrix::colSums(x = GetAssayData(object = HB13, slot = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = HB13, slot = 'counts'))
# The [[ operator can add columns to object metadata, and is a great place to stash QC stats
HB13[['percent.mito']] <- percent.mito
```
```{r}
rb.genes <- rownames(HB13)[grep("^RP[SL]",rownames(HB13))]
percent.ribo <- Matrix::colSums(x = GetAssayData(object = HB13, slot = 'counts')[rb.genes, ]) / Matrix::colSums(x = GetAssayData(object = HB13, slot = 'counts'))
HB13[['percent.ribo']] <- percent.ribo
VlnPlot(object = HB13, features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), ncol = 4)
```
```{r}
# The number of genes and UMIs (nGene and nUMI) are automatically calculated
# for every object by Seurat.  For non-UMI data, nUMI represents the sum of
# the non-normalized values within a cell We calculate the percentage of
# mitochondrial genes here and store it in percent.mito using AddMetaData.
# We use object@raw.data since this represents non-transformed and
# non-log-normalized counts The % of UMI mapping to MT-genes is a common
# scRNA-seq QC metric.
mito.features <- grep(pattern = "^MT-", x = rownames(x = HB14), value = TRUE)
percent.mito <- Matrix::colSums(x = GetAssayData(object = HB14, slot = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = HB14, slot = 'counts'))
# The [[ operator can add columns to object metadata, and is a great place to stash QC stats
HB14[['percent.mito']] <- percent.mito
```
```{r}
rb.genes <- rownames(HB14)[grep("^RP[SL]",rownames(HB14))]
percent.ribo <- Matrix::colSums(x = GetAssayData(object = HB14, slot = 'counts')[rb.genes, ]) / Matrix::colSums(x = GetAssayData(object = HB14, slot = 'counts'))
HB14[['percent.ribo']] <- percent.ribo
VlnPlot(object = HB14, features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), ncol = 4)
```
```{r}
# The number of genes and UMIs (nGene and nUMI) are automatically calculated
# for every object by Seurat.  For non-UMI data, nUMI represents the sum of
# the non-normalized values within a cell We calculate the percentage of
# mitochondrial genes here and store it in percent.mito using AddMetaData.
# We use object@raw.data since this represents non-transformed and
# non-log-normalized counts The % of UMI mapping to MT-genes is a common
# scRNA-seq QC metric.
mito.features <- grep(pattern = "^MT-", x = rownames(x = HB15), value = TRUE)
percent.mito <- Matrix::colSums(x = GetAssayData(object = HB15, slot = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = HB15, slot = 'counts'))
# The [[ operator can add columns to object metadata, and is a great place to stash QC stats
HB15[['percent.mito']] <- percent.mito
```
```{r}
rb.genes <- rownames(HB15)[grep("^RP[SL]",rownames(HB15))]
percent.ribo <- Matrix::colSums(x = GetAssayData(object = HB15, slot = 'counts')[rb.genes, ]) / Matrix::colSums(x = GetAssayData(object = HB15, slot = 'counts'))
HB15[['percent.ribo']] <- percent.ribo
VlnPlot(object = HB15, features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), ncol = 4)
```
```{r}
# The number of genes and UMIs (nGene and nUMI) are automatically calculated
# for every object by Seurat.  For non-UMI data, nUMI represents the sum of
# the non-normalized values within a cell We calculate the percentage of
# mitochondrial genes here and store it in percent.mito using AddMetaData.
# We use object@raw.data since this represents non-transformed and
# non-log-normalized counts The % of UMI mapping to MT-genes is a common
# scRNA-seq QC metric.
mito.features <- grep(pattern = "^MT-", x = rownames(x = HB16), value = TRUE)
percent.mito <- Matrix::colSums(x = GetAssayData(object = HB16, slot = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = HB16, slot = 'counts'))
# The [[ operator can add columns to object metadata, and is a great place to stash QC stats
HB16[['percent.mito']] <- percent.mito
```
```{r}
rb.genes <- rownames(HB16)[grep("^RP[SL]",rownames(HB16))]
percent.ribo <- Matrix::colSums(x = GetAssayData(object = HB16, slot = 'counts')[rb.genes, ]) / Matrix::colSums(x = GetAssayData(object = HB16, slot = 'counts'))
HB16[['percent.ribo']] <- percent.ribo
VlnPlot(object = HB16, features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), ncol = 4)
```
```{r}
# The number of genes and UMIs (nGene and nUMI) are automatically calculated
# for every object by Seurat.  For non-UMI data, nUMI represents the sum of
# the non-normalized values within a cell We calculate the percentage of
# mitochondrial genes here and store it in percent.mito using AddMetaData.
# We use object@raw.data since this represents non-transformed and
# non-log-normalized counts The % of UMI mapping to MT-genes is a common
# scRNA-seq QC metric.
mito.features <- grep(pattern = "^MT-", x = rownames(x = HB17), value = TRUE)
percent.mito <- Matrix::colSums(x = GetAssayData(object = HB17, slot = 'counts')[mito.features, ]) / Matrix::colSums(x = GetAssayData(object = HB17, slot = 'counts'))
# The [[ operator can add columns to object metadata, and is a great place to stash QC stats
HB17[['percent.mito']] <- percent.mito
```
```{r}
rb.genes <- rownames(HB17)[grep("^RP[SL]",rownames(HB17))]
percent.ribo <- Matrix::colSums(x = GetAssayData(object = HB17, slot = 'counts')[rb.genes, ]) / Matrix::colSums(x = GetAssayData(object = HB17, slot = 'counts'))
HB17[['percent.ribo']] <- percent.ribo
VlnPlot(object = HB17, features = c("nFeature_RNA", "nCount_RNA", "percent.mito", "percent.ribo"), ncol = 4)
```
```{r}
# Set up HB1 object
HB1 <- subset(x = HB1, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mito < 0.15)

# Set up HB4 object
HB4 <- subset(x = HB4, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mito < 0.15)

# Set up HB6 object
HB6 <- subset(x = HB6, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mito < 0.15)

# Set up HB7 object
HB7 <- subset(x = HB7, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mito < 0.15)

# Set up HB12 object
HB12 <- subset(x = HB12, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mito < 0.15)

# Set up HB13 object
HB13 <- subset(x = HB13, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mito < 0.15)

# Set up HB14 object
HB14 <- subset(x = HB14, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mito < 0.15)

# Set up HB15 object
HB15 <- subset(x = HB15, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mito < 0.15)

# Set up HB16 object
HB16 <- subset(x = HB16, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mito < 0.15)

# Set up HB17 object
HB17 <- subset(x = HB17, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mito < 0.15)
```


```{r}
HB1 <- NormalizeData(HB1)
HB1 <- FindVariableFeatures(HB1, selection.method = "vst", nfeatures = 2000)

HB4 <- NormalizeData(HB4)
HB4 <- FindVariableFeatures(HB4, selection.method = "vst", nfeatures = 2000)

HB6 <- NormalizeData(HB6)
HB6 <- FindVariableFeatures(HB6, selection.method = "vst", nfeatures = 2000)

HB7 <- NormalizeData(HB7)
HB7 <- FindVariableFeatures(HB7, selection.method = "vst", nfeatures = 2000)

HB12 <- NormalizeData(HB12)
HB12 <- FindVariableFeatures(HB12, selection.method = "vst", nfeatures = 2000)

HB13 <- NormalizeData(HB13)
HB13 <- FindVariableFeatures(HB13, selection.method = "vst", nfeatures = 2000)

HB14 <- NormalizeData(HB14)
HB14 <- FindVariableFeatures(HB14, selection.method = "vst", nfeatures = 2000)

HB15 <- NormalizeData(HB15)
HB15 <- FindVariableFeatures(HB15, selection.method = "vst", nfeatures = 2000)

HB16 <- NormalizeData(HB16)
HB16 <- FindVariableFeatures(HB16, selection.method = "vst", nfeatures = 2000)

HB17 <- NormalizeData(HB17)
HB17 <- FindVariableFeatures(HB17, selection.method = "vst", nfeatures = 2000)
```
```{r}
HB1 <- AddMetaData(HB1, metadata="A", col.name="HB")
HB4 <- AddMetaData(HB4, metadata="B", col.name="HB")
HB6 <- AddMetaData(HB6, metadata="C", col.name="HB")
HB7 <- AddMetaData(HB7, metadata="D", col.name="HB")
HB12 <- AddMetaData(HB12, metadata="E", col.name="HB")
HB13 <- AddMetaData(HB13, metadata="F", col.name="HB")
HB14 <- AddMetaData(HB14, metadata="G", col.name="HB")
HB15 <- AddMetaData(HB15, metadata="H", col.name="HB")
HB16 <- AddMetaData(HB16, metadata="I", col.name="HB")
HB17 <- AddMetaData(HB17, metadata="J", col.name="HB")
```
```{r}
HBbig.list <- list(HB1, HB4, HB6, HB7, HB12, HB13, HB14, HB15, HB16, HB17) 
```
```{r}
features <- SelectIntegrationFeatures(object.list = HBbig.list)
```
```{r}
HB.anchors <- FindIntegrationAnchors(object.list = HBbig.list, anchor.features = features)
```
```{r}
# this command creates an 'integrated' data assay
HB.combined <- IntegrateData(anchorset = HB.anchors)
```


```{r}
# specify that we will perform downstream analysis on the corrected data note that the original
# unmodified data still resides in the 'RNA' assay
DefaultAssay(HB.combined) <- "integrated"

# Run the standard workflow for visualization and clustering
HB.combined <- ScaleData(HB.combined, vars.to.regress = c("nCount_RNA", "percent.mito", "percent.ribo"), verbose = FALSE)
```
```{r}
HB.combined <- RunPCA(HB.combined, verbose = FALSE)
```
```{r}
HB.combined <- RunUMAP(HB.combined, reduction = "pca", dims = 1:30, set.seed(123))
HB.combined <- FindNeighbors(HB.combined, reduction = "pca", dims = 1:30)
HB.combined <- FindClusters(HB.combined, resolution = 0.4)
```

```{r}
# Visualization
DimPlot(HB.combined, reduction = "umap", label = TRUE, repel = TRUE)
```

```{r}
DimPlot(HB.combined, reduction = "umap", split.by = "HB", ncol = 4)
```
```{r}
# How do I create a UMAP plot where cells are colored by replicate?  First, store the current
# identities in a new column of meta.data called CellType
HB.combined$CellType <- Idents(HB.combined)
# Next, switch the identity class of all cells to reflect replicate ID
Idents(HB.combined) <- "seurat_clusters"
DimPlot(HB.combined, reduction = "umap", label = TRUE)
```
```
```{r}
p2 <- DimPlot(HB.combined, reduction = "umap", label = TRUE, repel = TRUE)
p2
```

```{r}
saveRDS(HB.combined, file = "/Users/pengwu/Documents/Experiments/scRNAseq/HBcombined.rds")
```

```{r}
DimPlot(object = HB.combined, reduction = 'umap', split.by = "HB", width = 2000, height = 1500, units = "px", res = 300)
tiff(file = "/Volumes/backups/Peng/Research/scRNAseq/HBcombined.tiff", width = 2000, height = 1500, units = "px", res = 300)
dev.off()
```

```{r}
# For performing differential expression after integration, we switch back to the original data
DefaultAssay(HB.combined) <- "RNA"
AllClust.markers <- FindAllMarkers(HB.combined, grouping.var = "HB", verbose = FALSE)
write.csv(AllClust.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/allclusters.csv",row.names = TRUE)
```
```{r}
cluster0.markers <- FindConservedMarkers(HB.combined, ident.1 = 0, grouping.var = "HB", verbose = FALSE)
write.csv(cluster0.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster0.csv",row.names = TRUE)
```
```{r}
cluster1.markers <- FindConservedMarkers(HB.combined, ident.1 = 1, grouping.var = "HB", verbose = FALSE)
write.csv(cluster1.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster1.csv",row.names = TRUE)
```
```{r}
cluster2.markers <- FindConservedMarkers(HB.combined, ident.1 = 2, grouping.var = "HB", verbose = FALSE)
write.csv(cluster2.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster2.csv",row.names = TRUE)
```
```{r}
cluster3.markers <- FindConservedMarkers(HB.combined, ident.1 = 3, grouping.var = "HB", verbose = FALSE)
write.csv(cluster3.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster3.csv",row.names = TRUE)
```
```{r}
cluster4.markers <- FindConservedMarkers(HB.combined, ident.1 = 4, grouping.var = "HB", verbose = FALSE)
write.csv(cluster4.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster4.csv",row.names = TRUE)
```
```{r}
cluster5.markers <- FindConservedMarkers(HB.combined, ident.1 = 5, grouping.var = "HB", verbose = FALSE)
write.csv(cluster5.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster5.csv",row.names = TRUE)
```
```{r}
cluster6.markers <- FindConservedMarkers(HB.combined, ident.1 = 6, grouping.var = "HB", verbose = FALSE)
write.csv(cluster6.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster6.csv",row.names = TRUE)
```
```{r}
cluster7.markers <- FindConservedMarkers(HB.combined, ident.1 =7, grouping.var = "HB", verbose = FALSE)
write.csv(cluster7.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster7.csv",row.names = TRUE)
```
```{r}
cluster8.markers <- FindConservedMarkers(HB.combined, ident.1 =8, grouping.var = "HB", verbose = FALSE)
write.csv(cluster8.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster8.csv",row.names = TRUE)
```
```{r}
cluster9.markers <- FindConservedMarkers(HB.combined, ident.1 =9, grouping.var = "HB", verbose = FALSE)
write.csv(cluster9.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster9.csv",row.names = TRUE)
```
```{r}
cluster10.markers <- FindConservedMarkers(HB.combined, ident.1 = 10, grouping.var = "HB", verbose = FALSE)
write.csv(cluster10.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster10.csv",row.names = TRUE)
```
```{r}
cluster11.markers <- FindConservedMarkers(HB.combined, ident.1 = 11, grouping.var = "HB", verbose = FALSE)
write.csv(cluster11.markers,"/Users/pengwu/Documents/Experiments/scRNAseq/cluster11.csv",row.names = TRUE)
```

```{r}
# Example for plotting gene features
FeaturePlot(object = HB.combined, features = c("FGF19", "KLB", "FGFR4"))
tiff(file = "FGF.tiff", width = 1500, height = 1500, units = "px", res = 300)
dev.off()
```

```{r}
# Example for calculating gene signatures
FetvNorm <- read.csv('/Users/pengwu/Documents/Experiments/RNAseq/180805Final/FetvNorm_Top50.csv', header=F, sep=",")
FetvNorm <- as.matrix(FetvNorm)
HB.combined <- AddModuleScore(HB.combined,
                  features = list(FetvNorm),
                  name="FetvNorm")
```
```{r}
# Plot scores
FeaturePlot(HB.combined,
            features = c("FetvNorm1"), label = FALSE, repel = TRUE) +
            scale_colour_gradientn(colours = rev(brewer.pal(n = 11, name = "RdBu")))
```
```{r}
# Example for violin plots
my_genes <- HB.combined[row.names(subset(fData(HB.combined), gene_short_name %in% c("AFP", "HNF4A", "KRT19", "SOX4"))),]

tiff(file = "AFPGenes_Violin.tiff", width = 1500, height = 1500, units = "px", res = 300)
p <- plot_genes_violin(my_genes, grouping = "cluster", ncol=2, min_expr=0.1,
  color_by = "cluster")
p + geom_jitter(shape=16, position=position_jitter(0.2))
dev.off()
```
```{r}
# Dot plots - the size of the dot corresponds to the percentage of cells expressing the
# feature in each cluster. The color represents the average expression level
features.plot <- c("FGF1", "FGF2", "FGF3", "FGF4", "FGF7", "FGF8", "FGF9", "FGF10", "FGF11", "FGF12", "FGF13", "FGF14", "FGF17", "FGF18", "FGF19", "FGF20", "FGF21", "FGF22", "FGF23")
DotPlot(HB.combined, features = features.plot, group.by = "HB") + RotatedAxis()
```

```{r}
# Dot plots - the size of the dot corresponds to the percentage of cells expressing the
# feature in each cluster. The color represents the average expression level
features.plot <- c("FGFR1", "FGFR2", "FGFR3", "FGFR4", "KLB", "EGFR", "MET", "IGF1R", "IGF2R", "INSR", "EGF", "HGF", "IGF1", "IGF2.1")
DotPlot(HB.combined, features = features.plot, group.by = "HB") + RotatedAxis()
```


```{r}
umapCoord <- as.data.frame(Embeddings(object = HB.combined[["umap"]]))
umapCoordmat <- as.matrix(umapCoord)
```

```{r}
# Monocle for pseudotime analyses
remotes::install_github('satijalab/seurat-wrappers')
library(monocle3)
library(Seurat)
library(SeuratWrappers)
library(patchwork)
library(dplyr)

set.seed(1234)
```
```{r}
HB.monocle <- as.cell_data_set(HB.combined)
HB.monocle <- cluster_cells(HB.monocle, cluster_method = "louvain")

p1 <- plot_cells(HB.monocle, color_cells_by = "cluster", show_trajectory_graph = FALSE)
p2 <- plot_cells(HB.monocle, color_cells_by = "partition", show_trajectory_graph = FALSE)
wrap_plots(p1, p2)
```
```{r}
integrated.sub <- subset(as.Seurat(HB.monocle, assay = NULL), monocle3_partitions == 1)
HB.monocle <- as.cell_data_set(integrated.sub)
```
```{r}
HB.monocle <- learn_graph(HB.monocle, use_partition = TRUE, verbose = FALSE)
```
```{r}
plot_cells(HB.monocle,
           color_cells_by = "cluster",
           label_groups_by_cluster=FALSE,
           label_leaves=FALSE,
           label_branch_points=FALSE)
```
```{r}
HB.monocle <- order_cells(HB.monocle, root_cells = colnames(HB.monocle[,clusters(HB.monocle) == 47]))
plot_cells(HB.monocle,
           color_cells_by = "pseudotime",
           group_cells_by = "cluster",
           label_cell_groups = FALSE,
           label_groups_by_cluster=FALSE,
           label_leaves=FALSE,
           label_branch_points=FALSE,
           label_roots = FALSE,
           trajectory_graph_color = "grey60")
```

Add a new chunk by clicking the *Insert Chunk* button on the toolbar or by pressing *Cmd+Option+I*.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the *Preview* button or press *Cmd+Shift+K* to preview the HTML file). 

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike *Knit*, *Preview* does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

